# Imported Inequality 

# Code for replication of graphs & figures quoted in text (file 1 of 2)


# Setup -------------------------------------------------------------------

# destination folder for graphs
outpath <- paste0(highest_folder, "/figs")

## theme
my_theme <- theme_minimal() +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank(),
        legend.position = 'none')



# Cross-section -----------------------------------------------------------


# upload data on top shares
data_ts <- read_csv("output/ts_migrant_comb_18plus.csv")

#recode grouping variable
data_ts <- data_ts %>% 
  mutate(group = case_when(
    group == "all" ~ "all",
    group == "dummy_on" ~ "migrant",
    group == "dummy_off_noreplacem" ~ "brits_noreplace",
    group == "dummy_off_withreplacem" ~ "brits_withreplace",
    TRUE ~ NA_character_
  ))


# upload data on migrants along the income distribution
data_mig <- read_csv("output/migrants_cross_section.csv")

# rename variable
ind <- grep("prop", names(data_mig)) 
names(data_mig)[ind] <- "migrant_prop"


### Figure 2: proportion of migrants by income percentile in the cross-section

# pick a year
yy <- 2017

# keep data for that year only
dd_ts <- data_ts %>%
  filter(tax_year == yy) %>%
  filter(group == "all")
dd_mig <- data_mig %>%
  filter(tax_year == yy)


# calculate proportion of migrants for Top 1pc
dd_ts <- dd_ts %>%
  mutate(migrant_prop = migrants_count/topshare_count)

# merge datasets
dg <- bind_rows(dd_ts, dd_mig) 


# # # #

## adjust percentiles within Top 1 so that it's Top 1 excluding Top 0.1

# recalculate migrant counts in those percentiles
dg <- dg %>%
  filter(!is.na(ts)) %>%
  select(migrants_count, ts) %>%
  mutate(ts = as.character(ts*100)) %>%
  spread(key = ts, value = migrants_count, sep = "_") %>%
  mutate(migrants_count_1 = ts_1 - ts_0.1,
         migrants_count_0.1 = ts_0.1 - ts_0.01,
         migrants_count_0.01 = ts_0.01 - ts_0.001,
         migrants_count_0.001 = ts_0.001) %>% 
  select(starts_with("migrants_count")) %>% 
  gather(key = "ts", value = "migrants_count_excl") %>% 
  mutate(ts = str_replace(ts, "migrants_count_", ""),
         ts = as.numeric(ts)/100) %>%
  full_join(dg, by = "ts") %>% 
  mutate(topshare_count_excl = if_else(ts == .00001, topshare_count, round(topshare_count - topshare_count/10)))

# calculate migrant proportion
dg <- dg %>%
  mutate(migrant_prop = if_else(!is.na(ts), migrants_count_excl/topshare_count_excl, migrant_prop))

# Group percentiles into meaningful aggregates
# low end distribution; 90-95; 95-99; 99-99.9 ; 99.9-99.99; 99.99-99.999
dg <- dg %>%
  mutate(percentile_group = case_when(pctile >= 1 & pctile <= 10 ~ 1,
                                      pctile > 10 & pctile <= 20 ~ 2,
                                      pctile > 20 & pctile <= 30 ~ 3,
                                      pctile > 30 & pctile <= 40 ~ 4,
                                      pctile > 40 & pctile <= 50 ~ 5,
                                      pctile > 50 & pctile <= 60 ~ 6,
                                      pctile > 60 & pctile <= 70 ~ 7,
                                      pctile > 70 & pctile <= 80 ~ 8,
                                      pctile > 80 & pctile <= 90 ~ 9,
                                      pctile > 90 & pctile <= 99 ~ 10,
                                      ts == 0.01 ~ 11,
                                      ts == 0.001 ~ 12,
                                      ts == 0.0001 ~ 13,
                                      ts == 0.00001 ~ 14))

# # # #

# prepare our data
dg <- dg %>%
  mutate(pp = case_when(pctile >= 1 & pctile <= 10 ~ 1,
                        pctile > 10 & pctile <= 20 ~ 2,
                        pctile > 20 & pctile <= 30 ~ 3,
                        pctile > 30 & pctile <= 40 ~ 4,
                        pctile > 40 & pctile <= 50 ~ 5,
                        pctile > 50 & pctile <= 60 ~ 6,
                        pctile > 60 & pctile <= 70 ~ 7,
                        pctile > 70 & pctile <= 80 ~ 8,
                        pctile > 80 & pctile <= 83 ~ 9,
                        pctile > 83 & pctile <= 99 ~ 10))

# collapse by percentile
dgraph <- dg %>%
  group_by(pp) %>%
  summarise(migrant_prop = mean(migrant_prop))


# Fig 1 -------------------------------------------------------------------


# panel (a): plot with all percentiles

dg %>%
  filter(!is.na(pctile), pctile > 40) %>%
  mutate(highlight = if_else(pctile == 99, 15, 1)) %>% 
  ggplot(., aes(pctile, migrant_prop, group = highlight )) +
  geom_point(aes(color = as.factor(highlight),
                 size = as.factor(highlight))) +
  scale_color_manual(values = c("royalblue1", "maroon")) +
  scale_size_manual(values = c(1, 2)) +
  scale_y_continuous(limits = c(0, .25)) +
  scale_x_continuous(breaks = c(1, 20, 40, 60, 80, 100)) +
  geom_hline(yintercept = 0, color = "black", size = .1) + 
  theme_minimal() +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor = element_blank(),
        legend.position = "None") +
  xlab("Percentiles") +
  ylab("Proportion of Migrants")

# save graph
ggsave(filename = "graph_1a.pdf", path = outpath,  dpi = "retina",
       width = 16, height = 9, units = "cm")

#Black & white version
dg %>%
  filter(!is.na(pctile), pctile > 40) %>%
  mutate(highlight = if_else(pctile == 99, 15, 1)) %>% 
  ggplot(., aes(pctile, migrant_prop, group = highlight )) +
  geom_point(aes(color = as.factor(highlight),
                 size = as.factor(highlight))) +
  scale_color_manual(values = c("grey", "black")) +
  scale_size_manual(values = c(1, 2)) +
  scale_y_continuous(limits = c(0, .25)) +
  scale_x_continuous(breaks = c(1, 20, 40, 60, 80, 100)) +
  geom_hline(yintercept = 0, color = "black", size = .1) + 
  theme_minimal() +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor = element_blank(),
        legend.position = "None") +
  xlab("Percentiles") +
  ylab("Proportion of Migrants")

# save graph
ggsave(filename = "graph_1a_bw.pdf", path = outpath,  dpi = "retina",
       width = 16, height = 9, units = "cm")


# panel (b): all percentiles from 90 and up

dg %>%
  mutate(percentile_group = case_when(pctile >= 90 & pctile < 99 ~ pctile - 89,
                                      ts == 0.01 ~ 10,
                                      ts == 0.001 ~ 11,
                                      ts == 0.0001 ~ 12,
                                      ts == 0.00001 ~ 13,
                                      TRUE ~ NA_real_),
         highlight = if_else(percentile_group >= 10, 1, 0)) %>%
  ggplot(., aes(percentile_group, migrant_prop, color = as.factor(highlight),
                size = as.factor(highlight))) +
  geom_point() +
  geom_vline(xintercept = 9.5, color = "maroon", linetype = 2) +
  geom_hline(yintercept = 0, color = "black", size = .1) + 
  scale_color_manual(values = c("royalblue1", "maroon")) +
  scale_size_manual(values = c(2, 2)) +
  scale_y_continuous(limits = c(0, .5)) +
  scale_x_continuous(breaks = 1:13,
                     labels = c("90", "91", "92", "93", "94", "95", "96", "97",
                                "98", "Top 1 - 0.1", "0.1 - 0.01",
                                "0.01 - 0.001", "Top 0.001")) +
  theme_minimal() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        axis.text.x = element_text(angle = 50, size = 11, vjust = 1, hjust = 1),
        legend.position = "None") +
  xlab("Percentiles") +
  ylab("Proportion of Migrants")

# save graph
ggsave(filename = "graph_1b.pdf", path = outpath,  dpi = "retina",
       width = 16, height = 9, units = "cm")  


#Black & white version
dg %>%
  mutate(percentile_group = case_when(pctile >= 90 & pctile < 99 ~ pctile - 89,
                                      ts == 0.01 ~ 10,
                                      ts == 0.001 ~ 11,
                                      ts == 0.0001 ~ 12,
                                      ts == 0.00001 ~ 13,
                                      TRUE ~ NA_real_),
         highlight = if_else(percentile_group >= 10, 1, 0)) %>%
  ggplot(., aes(percentile_group, migrant_prop, color = as.factor(highlight),
                size = as.factor(highlight))) +
  geom_point() +
  geom_vline(xintercept = 9.5, color = "grey80", linetype = 2) +
  geom_hline(yintercept = 0, color = "black", size = .1) + 
  scale_color_manual(values = c("grey", "black")) +
  scale_size_manual(values = c(2, 2)) +
  scale_y_continuous(limits = c(0, .5)) +
  scale_x_continuous(breaks = 1:13,
                     labels = c("90", "91", "92", "93", "94", "95", "96", "97",
                                "98", "Top 1 - 0.1", "0.1 - 0.01",
                                "0.01 - 0.001", "Top 0.001")) +
  theme_minimal() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        axis.text.x = element_text(angle = 50, size = 11, vjust = 1, hjust = 1),
        legend.position = "None") +
  xlab("Percentiles") +
  ylab("Proportion of Migrants")

# save graph
ggsave(filename = "graph_1b_bw.pdf", path = outpath,  dpi = "retina",
       width = 16, height = 9, units = "cm")  

# remove
rm(data_mig, data_ts, dd_mig, dd_ts, dg, yy)


# Fig 2 -------------------------------------------------------------------


# upload data on top shares

data_ts <- read_csv("output/ts_migrant_comb_18plus.csv")

#recode grouping variable
data_ts <- data_ts %>% 
  mutate(group = case_when(
    group == "all" ~ "all",
    group == "dummy_on" ~ "migrant",
    group == "dummy_off_noreplacem" ~ "brits_noreplace",
    group == "dummy_off_withreplacem" ~ "brits_withreplace",
    TRUE ~ NA_character_
  ))

# upload migrant proportion in the full population - 
migrant_pop <- read_csv('output/Migrant_share.csv')



#Add top share counts
for_merge <- data_ts %>% 
  filter(ts == 0.01 & group == "all") %>% 
  mutate(migrant_P99 = migrants_count/1000, #ONS data are in '000s
         count_P99 = topshare_count/1000) %>% 
  select(tax_year, migrant_P99, count_P99)

migrant_pop <- left_join(migrant_pop, for_merge)

migrant_pop <- migrant_pop %>% 
  mutate(migrant_prop_P0P99 = (Migrants-migrant_P99)/(`Total population`-count_P99),
         growth = migrant_prop_P0P99/migrant_prop_P0P99[1])




# upload NEW income control totals
tt <- read_excel('external/AdvaniSummersTarrant_UKTopIncomes_WIDTechnicalNote_Data.xlsx',
                 sheet = 'Income control', skip = 1)


tt <- tt %>% 
  mutate(tax_year = 1997:2019, 
         fiscal_income = `Pre-tax total excluding capital gains`*1000000
  ) %>% 
  select(tax_year, fiscal_income)
#Impute missing year
tt[13, 2] <- (tt[[12, 2]] + tt[[14, 2]])/2

totals <- tt


# # # # 

# creating top share
dd <- data_ts %>%
  mutate(ti_num = n*ti, # numerator
         tei_num = n*tei,
         tii_num = tii*n,
         emp_num = emp_inc*n,
         selfemp_num = selfemp_inc*n) %>%
  left_join(totals, by = 'tax_year') %>% # add control totals
  mutate(topshare_ti = ti_num/fiscal_income, # calculate share
         topshare_tei = tei_num/fiscal_income,
         topshare_tii = tii_num/fiscal_income,
         topshare_emp = emp_num/fiscal_income,
         topshare_self = selfemp_num/fiscal_income)

#Text check
#4.2: Between 1997 and 2018... the top 1% share rose XX.X% from XX.X% to XX.X%. 
#The top 0.1% (0.01%) share rose faster, growing XX.X% (XX.X%) over the period.
dd %>% filter(tax_year %in% c(1997, 2018)) %>%
  filter(ts != 0.00001 & group == "all") %>% 
  select(tax_year, ts, topshare_ti) %>% 
  pivot_wider(names_from = tax_year, values_from = topshare_ti) %>% 
  mutate(diff = `2018`/`1997`*100-100)


# save top shares (used for graph file 2)
  write_csv(dd, 'output/topshares_newti.csv')



# # # #

# calculate share of migrants (income and counts) 
dgraph <- map_dfr(c(1, 0.1, 0.01, 0.001)/100, function(x) {
  
  # part A: share of count
  pp <- data_ts %>%
    filter(ts == x) %>%
    filter(group == 'all') %>%
    mutate(prop = migrants_count/topshare_count) %>%
    select(tax_year, prop)
  
  # part B: share of average income
  py <- dd %>%
    filter(group == 'migrant' | group == 'all') %>%
    filter(ts == x) %>%
    mutate(inc_var = ti) %>%  #### <----- choose income variable here ######
    select(inc_var, group, tax_year) %>%
    spread(key = 'group', value = 'inc_var', sep = '_') %>%
    mutate(relative_avg_income = group_migrant/group_all) %>%
    select(tax_year, relative_avg_income)
  
  # part C: proportion of income (Y*N)
  pi <- dd %>%
    filter(group == 'migrant' | group == 'all') %>%
    filter(ts == x) %>%
    mutate(inc_var = ti_num) %>%  #### <----- choose income variable here ######
    select(inc_var, group, tax_year) %>%
    spread(key = 'group', value = 'inc_var', sep = '_') %>%
    mutate(prop_income = group_migrant/group_all) %>%
    select(tax_year, prop_income)
  
  # data
  dgraph <- left_join(pi, pp, by = 'tax_year')
  dgraph <- left_join(dgraph, py, by = 'tax_year')
  
  # top share
  dgraph <- dgraph %>%
    mutate(ts = x)
  
  # return
  return(dgraph)
  
})



# top share as character
dgraph <- dgraph %>%
  mutate(ts = case_when(ts == .01 ~ "Top 1",
                        ts == .001 ~ "Top 0.1",
                        ts == .0001 ~ "Top 0.01",
                        ts == .00001 ~ "Top 0.001"))

# prepare share in full population
migrant_pop <- migrant_pop %>%
  mutate(prop = migrant_prop_P0P99,
         ts = 'all') %>%
  select(tax_year, prop, ts)

# merge migrant share in full population
dgraph <- dgraph %>%
  bind_rows(migrant_pop)

# x-axis
dgraph <- dgraph %>%
  mutate(xaxis = case_when(ts == 'Top 1' ~ 1,
                           ts == 'Top 0.1' ~ 2,
                           ts == 'Top 0.01' ~ 3,
                           ts == 'Top 0.001' ~ 4,
                           ts == 'all' ~ 0),
         xaxis = as.factor(xaxis))

# categories
cat <- c('Bottom 99', 'Top 1', 'Top 0.1', 'Top 0.01', 'Top 0.001')
cat2 <- c('Top 1', 'Top 0.1', 'Top 0.01', 'Top 0.001')


#Text check
#Intro: rising by 50% for the share of migrants in the top 1%, and more than doubling for those in the top 0.01%.
dgraph %>% filter(tax_year %in% c(1997, 2018) & ts %in% c("Top 1", "Top 0.01")) %>% 
  select(tax_year, ts, prop) %>% 
  pivot_wider(names_from = tax_year, values_from = prop) %>% 
  mutate(rise = `2018`/`1997`)
#1.2: In 2017, we find that around XX.X% of all taxpayers are migrants.
dgraph %>% filter(ts == "all", tax_year == 2017) %>% select(prop)
#2.2: The share of migrants in the top 1% rose XX%.... top 0.01% and 0.001% both saw their migrant shares double
dgraph %>% filter(ts %in% c('Top 1', 'Top 0.01', 'Top 0.001')) %>% 
  filter(tax_year %in% c(1997, 2018)) %>% 
  select(ts, tax_year, prop) %>% 
  pivot_wider(names_from = tax_year, values_from = prop) %>% 
  mutate(rise = `2018`/`1997`)
#2.2: the share of top 1% (0.01%) income going to migrants rose from...
dgraph %>% filter(ts %in% c('Top 1', 'Top 0.01')) %>% 
  filter(tax_year %in% c(1997, 2018)) %>% 
  mutate(income = round(prop_income, 2)) %>% 
  select(ts, tax_year, income) %>% 
  pivot_wider(names_from = tax_year, values_from = income) 

  

# # # #

# panel (a):  all top shares on the same graph

dgraph %>%
  mutate(ts = as.character(ts)) %>%
  ggplot(., aes(tax_year, prop, group = xaxis, color = xaxis, shape = xaxis)) +
  geom_point(size = 2) +
  geom_line() +
  scale_y_continuous(limits = c(0, .5)) +
  scale_shape_manual(values = c(4, 15, 16, 17, 18),
                     labels = cat) +
  scale_color_manual(values = c('gray', 'dodgerblue', 'navy', 'maroon', 'red'),
                     labels = cat) +
  scale_x_continuous(breaks = seq(1997, 2018, 3)) +
  xlab('Years') +
  ylab('Share of Individuals') +
  my_theme +
  theme(legend.position = 'bottom',
        legend.title = element_blank())

## save graph
ggsave(filename = "graph_2a.pdf", path = outpath,  dpi = "retina",
       width = 16, height = 9, units = "cm")

#B&W version
dgraph %>%
  mutate(ts = as.character(ts)) %>%
  ggplot(., aes(tax_year, prop, group = xaxis, color = xaxis, shape = xaxis)) +
  geom_point(size = 2) +
  geom_line() +
  scale_y_continuous(limits = c(0, .5)) +
  scale_shape_manual(values = c(4, 15, 16, 17, 18),
                     labels = cat) +
  scale_color_manual(values = c('grey80', 'grey60', 'grey40', 'grey20', 'grey0'),
                     labels = cat) +
  scale_x_continuous(breaks = seq(1997, 2018, 3)) +
  xlab('Years') +
  ylab('Share of Individuals') +
  my_theme +
  theme(legend.position = 'bottom',
        legend.title = element_blank())

## save graph
ggsave(filename = "graph_2a_bw.pdf", path = outpath,  dpi = "retina",
       width = 16, height = 9, units = "cm")


# panel (b): proportion of income 

dgraph %>%
  filter(!ts == 'all') %>%
  ggplot(., aes(tax_year, prop_income, group = xaxis, color = xaxis, shape = xaxis)) +
  geom_point(size = 2) +
  geom_line() +
  scale_y_continuous(limits = c(0, .5)) +
  scale_shape_manual(values = c(15, 16, 17, 18),
                     labels = cat2) +
  scale_color_manual(values = c( 'dodgerblue','navy','maroon', 'red'),
                     labels = cat2) +
  xlab('Years') +
  ylab('Share of Income') +
  scale_x_continuous(breaks = seq(1997, 2018, 3)) +
  my_theme +
  theme(legend.position = 'bottom',
        legend.title = element_blank())


# save graph
ggsave(filename = "graph_2b.pdf", path = outpath,  dpi = "retina",
       width = 16, height = 9, units = "cm")


#B&W version
dgraph %>%
  filter(!ts == 'all') %>%
  ggplot(., aes(tax_year, prop_income, group = xaxis, color = xaxis, shape = xaxis)) +
  geom_point(size = 2) +
  geom_line() +
  scale_y_continuous(limits = c(0, .5)) +
  scale_shape_manual(values = c(15, 16, 17, 18),
                     labels = cat2) +
  scale_color_manual(values = c('grey60', 'grey40', 'grey20', 'grey0'),
                     labels = cat2) +
  xlab('Years') +
  ylab('Share of Income') +
  scale_x_continuous(breaks = seq(1997, 2018, 3)) +
  my_theme +
  theme(legend.position = 'bottom',
        legend.title = element_blank())


# save graph
ggsave(filename = "graph_2b_bw.pdf", path = outpath,  dpi = "retina",
       width = 16, height = 9, units = "cm")


# # # #

## normalization to 1 in 1997 for all time series

# add missing for years 1997-1999 for the overall time series
df <- tibble(tax_year = c(1997:1999),
             prop_income = rep(NA_real_, 3),
             prop = rep(NA_real_, 3),
             relative_avg_income = rep(NA_real_, 3),
             ts = rep("all", 3),
             xaxis = as.factor(rep(0, 3)))

# append
dnorm <- dgraph %>%
  bind_rows(df)

# normalization
dnorm <- dgraph %>%
  arrange(ts, tax_year) %>%
  group_by(ts) %>%
  mutate_at(vars(prop, prop_income, relative_avg_income),
            .funs = function(x) {x/x[4]})

# categories
cat <- c('Top 1', 'Top 0.1', 'Top 0.01', 'Top 0.001', 'Bottom 99')
cat2 <- c('Top 1', 'Top 0.1', 'Top 0.01', 'Top 0.001')



# Fig A4 ------------------------------------------------------------------



## Top shares in levels

# Top 1 and 0.1
dd %>%
  filter(ts == .01 | ts == .001) %>%
  filter(group == 'all') %>%
  select(tax_year, topshare_ti, ts) %>%
  mutate(ts_legend = if_else(ts == 0.01, "a", "b")) %>%
  ggplot(., aes(tax_year, topshare_ti, group = ts_legend, color = ts_legend, shape = ts_legend)) +
  geom_point(size = 2) +
  geom_line() +
  scale_y_continuous(limits = c(0, .16),
                     breaks = seq(0, .16, .04)) +
  scale_x_continuous(breaks = seq(1997, 2018, 3)) +
  scale_shape_manual(values = c(15, 16),
                     labels = c("Top 1", "Top 0.1")) +
  scale_color_manual(values = c('dodgerblue', 'navy'),
                     labels = c("Top 1", "Top 0.1")) +
  xlab("Years") +
  ylab("Share of Income") +
  my_theme +
  theme(legend.position = 'bottom',
        legend.title = element_blank(),
        text = element_text(size = 16))

# save graph
ggsave(filename = 'top_share_1.pdf', path = outpath,  dpi = "retina",
       width = 16, height = 9, units = "cm")


#B&W version
dd %>%
  filter(ts == .01 | ts == .001) %>%
  filter(group == 'all') %>%
  select(tax_year, topshare_ti, ts) %>%
  mutate(ts_legend = if_else(ts == 0.01, "a", "b")) %>%
  ggplot(., aes(tax_year, topshare_ti, group = ts_legend, color = ts_legend, shape = ts_legend)) +
  geom_point(size = 2) +
  geom_line() +
  scale_y_continuous(limits = c(0, .16),
                     breaks = seq(0, .16, .04)) +
  scale_x_continuous(breaks = seq(1997, 2018, 3)) +
  scale_shape_manual(values = c(15, 16),
                     labels = c("Top 1", "Top 0.1")) +
  scale_color_manual(values = c('grey', 'black'),
                     labels = c("Top 1", "Top 0.1")) +
  xlab("Years") +
  ylab("Share of Income") +
  my_theme +
  theme(legend.position = 'bottom',
        legend.title = element_blank(),
        text = element_text(size = 16))

# save graph
ggsave(filename = 'top_share_1_bw.pdf', path = outpath,  dpi = "retina",
       width = 16, height = 9, units = "cm")



# Top 0.01 and 0.001
dd %>%
  filter(ts < .001) %>%
  filter(group == 'all') %>%
  select(tax_year, topshare_ti, ts) %>%
  mutate(ts_legend = if_else(ts == 0.0001, "a", "b")) %>%
  ggplot(., aes(tax_year, topshare_ti, group = ts_legend, color = ts_legend, shape = ts_legend)) +
  geom_point(size = 2) +
  geom_line() +
  scale_y_continuous(limits = c(0, .025),
                     breaks = seq(0, .025, .005)) +
  scale_x_continuous(breaks = seq(1997, 2018, 3)) +
  scale_shape_manual(values = c(15, 16),
                     labels = c("Top 0.01", "Top 0.001")) +
  scale_color_manual(values = c('maroon', 'red'),
                     labels = c("Top 0.01", "Top 0.001")) +
  xlab("Years") +
  ylab("Share of Income") +
  my_theme +
  theme(legend.position = 'bottom',
        legend.title = element_blank(),
        text = element_text(size = 16))

# save graph
ggsave(filename = 'top_share_001.pdf', path = outpath,  dpi = "retina",
       width = 16, height = 9, units = "cm")




#B&W version
dd %>%
  filter(ts < .001) %>%
  filter(group == 'all') %>%
  select(tax_year, topshare_ti, ts) %>%
  mutate(ts_legend = if_else(ts == 0.0001, "a", "b")) %>%
  ggplot(., aes(tax_year, topshare_ti, group = ts_legend, color = ts_legend, shape = ts_legend)) +
  geom_point(size = 2) +
  geom_line() +
  scale_y_continuous(limits = c(0, .025),
                     breaks = seq(0, .025, .005)) +
  scale_x_continuous(breaks = seq(1997, 2018, 3)) +
  scale_shape_manual(values = c(15, 16),
                     labels = c("Top 0.01", "Top 0.001")) +
  scale_color_manual(values = c('grey', 'black'),
                     labels = c("Top 0.01", "Top 0.001")) +
  xlab("Years") +
  ylab("Share of Income") +
  my_theme +
  theme(legend.position = 'bottom',
        legend.title = element_blank(),
        text = element_text(size = 16))

# save graph
ggsave(filename = 'top_share_001_bw.pdf', path = outpath,  dpi = "retina",
       width = 16, height = 9, units = "cm")


# Growth decomposition ----------------------------------------------------


# varname
varname <- "ti"
topshare_ti <- "ti"  

# select variables
growth_dec <- dd %>%
  select(tax_year, topshare_ti, ts, group) %>%
  filter(!group == 'brits_withreplace')

# from long to wide
growth_dec <- growth_dec %>%
  spread(key = group, value = topshare_ti, sep = "_")

# top share vector
ts_vec <- c(1, 0.1, 0.01, 0.001)/100

bb <- map_dfr(ts_vec, function(x){
  
  # calculate growth
  growth_dec_calc <- growth_dec %>%
    filter(ts == x) %>%
    arrange(tax_year) %>%
    mutate(LHS = (group_all - group_all[1])/group_all[1],
           RHS_1 = (group_migrant - group_migrant[1])/group_all[1],
           RHS_2 = (group_brits_noreplace - group_brits_noreplace[1])/group_all[1]) %>%
    select(tax_year, ts, LHS, RHS_1, RHS_2)
  
})


#Fix names
bb <- bb %>% 
  mutate(All = LHS, Migrants = RHS_1, Natives = RHS_2)

# # # # 

#! May need to check y axis range

# time series graphs
map(c(1, 10, 100, 1000), function(x){
 #Respectively graphs 4b, A5a, A5b, A5c

  # data
  bb1 <- bb %>%
    filter(ts == 1/(x*100)) # %>%
    #gather(key = "group", value = "cumul", starts_with('cumul'))
  
  #bb1 %>% pivot_wider(names_from = group, values_from = cumul) %>% 
  #     write_csv(paste("decomp",x,".csv"))
  
  # y axis
  min <- min(bb1$LHS, bb1$RHS_1, bb1$RHS_2)
  max <- max(bb1$LHS, bb1$RHS_1, bb1$RHS_2)
  
  #Round each to nearest 0.1
  min <- floor(min*10)/10
  max <- ceiling(max*10)/10
  
  # if (max > 0.5) {
  #   min <- -0.25
  # }
  
  # Graph   
  bb1 %>%
    gather(key = group, value = cumul, LHS, RHS_1, RHS_2) %>%
    ggplot(., aes(tax_year, cumul, group = group, color = group)) +
    geom_line() +
    geom_point() +
    scale_color_manual(values = c('navy', 'maroon', 'dodgerblue'),
                       labels = c('All','Migrants', 'Natives')) +
    xlab('Years') +
    ylab('Cumulative Growth') +
    scale_y_continuous(limits = c(min, max)) +
    my_theme +
    theme(legend.position = 'bottom',
          legend.title = element_blank())

  # filename
  filename <- paste("growth_contribution_", topshare_ti, "_", x, '.pdf', sep = "")
  
  # save
  ggsave(filename = filename, path = outpath, dpi = 'retina',
         width = 16, height = 9, units = 'cm')


})


#B&W version
map(c(1, 10, 100, 1000), function(x){
  #Respectively graphs 4b, A5a, A5b, A5c
  
  # data
  bb1 <- bb %>%
    filter(ts == 1/(x*100)) 
  
  # y axis
  min <- min(bb1$LHS, bb1$RHS_1, bb1$RHS_2)
  max <- max(bb1$LHS, bb1$RHS_1, bb1$RHS_2)
  
  #Round each to nearest 0.1
  min <- floor(min*10)/10
  max <- ceiling(max*10)/10
  
  
  
  # Graph   
  bb1 %>%
    gather(key = group, value = cumul, All, Migrants, Natives) %>%
    ggplot(., aes(tax_year, cumul, group = group, color = group, shape = group)) +
    geom_line() +
    geom_point() +
    scale_color_manual(values = c('black', 'grey40', 'grey80')#,
                       #labels = c('All','Migrant', 'Natives')
                       ) +
    scale_shape_manual(values = c(16, 17, 18)) + 
    xlab('Years') +
    ylab('Cumulative Growth') +
    scale_y_continuous(limits = c(min, max)) +
    my_theme +
    theme(legend.position = 'bottom',
          legend.title = element_blank())
  
  # filename
  filename <- paste("growth_contribution_", topshare_ti, "_", x, '_bw.pdf', sep = "")
  
  # save
  ggsave(filename = filename, path = outpath, dpi = 'retina',
         width = 16, height = 9, units = 'cm')
  
  
})


# # # # 

# stacked bar chart
bb2 <- bb %>%
  filter(tax_year == 2018) %>%
  mutate(migrant_cont = RHS_1/LHS,
         native_cont = RHS_2/LHS) %>%
  mutate(xaxis = case_when(ts == .01 ~ 'Top 1',
                           ts == .001 ~ 'Top 0.1',
                           ts == .0001 ~ 'Top 0.01',
                           ts == .00001 ~ 'Top 0.001')) %>%
  mutate_at(vars(LHS, RHS_1, RHS_2), function(x){x*100}) %>%
  select(xaxis, LHS, RHS_1, RHS_2) %>%
  gather(key = group, value = cumul, LHS, RHS_1, RHS_2) %>%
  mutate(group = case_when(group == 'LHS' ~ "all",
                           group == "RHS_1" ~ 'Migrants',
                           group == "RHS_2" ~ 'Natives'))

# y axis
mm <- max(bb2$cumul) + 10


#Text check
#Abstract: Almost all (X%) of the observed growth in the UK top 1% income share over the past 20 years has accrued to migrants
#Intro: We show that X% of the rise in the top 1% share over the past two decades is contributed by migrants
#4.2: Migrants account for XX% of the growth in the top 1% share [from 1997 to 2018]
check <- bb2 %>% 
  filter(xaxis == "Top 1") %>% 
  filter(group != "Natives")
check[2,3]/check[1,3]
#Out of the X% increase ..., migrants accounted for...
check
#while natives contributed...
check[1,3] - check[2,3]


#4.2: Further up the distribution, migrants accounted for more than 70%
bb2 %>% 
  filter(xaxis %in% c("Top 0.1", "Top 0.01")) %>% 
  filter(group != "Natives") %>% 
  pivot_wider(names_from = group, values_from = cumul) %>% 
  mutate(check = Migrants/all*100)
#4.2: Migrants accounted for less than half of the increase... in the ten years since 2007...
bb %>% 
  filter(ts == 0.01) %>% 
  filter(tax_year %in% c(2007, 2018)) %>% 
  mutate(check = RHS_1/LHS*100)


#Graph 4a
bb2 %>%
  filter(!group == "all") %>%
  ggplot(., aes(reorder(xaxis, desc(xaxis)), cumul, fill = reorder(group, desc(group)))) +
  geom_col(position = 'stack') +
  xlab("Top Shares") +
  ylab("Cumulative Growth 1997-2018 (%)") +
  scale_fill_manual(values = c('dodgerblue', 'maroon'),
                    labels = c('Natives', 'Migrants')) +
  scale_y_continuous(limits = c(-10, 50),
                     breaks = seq(-10, 50, 10)) +
  my_theme +
  theme(legend.position = 'bottom',
        legend.title = element_blank())

# filename
filename <- paste('graph_4a', varname, '.pdf', sep = "")

# save
ggsave(filename = filename, path = outpath, dpi = 'retina',
       width = 16, height = 9, units = 'cm')


#B&W version
bb2 %>%
  filter(!group == "all") %>%
  ggplot(., aes(reorder(xaxis, desc(xaxis)), cumul, fill = reorder(group, desc(group)))) +
  geom_col(position = 'stack') +
  xlab("Top Shares") +
  ylab("Cumulative Growth 1997-2018 (%)") +
  scale_fill_manual(values = c('black', 'grey'),
                    labels = c('Natives', 'Migrants')) +
  scale_y_continuous(limits = c(-10, 50),
                     breaks = seq(-10, 50, 10)) +
  my_theme +
  theme(legend.position = 'bottom',
        legend.title = element_blank())

# filename
filename <- paste('graph_4a', varname, '_bw.pdf', sep = "")

# save
ggsave(filename = filename, path = outpath, dpi = 'retina',
       width = 16, height = 9, units = 'cm')


# DFL decomposition -------------------------------------------------------


#Time series data
data_ts <- read_csv("output/ts_migrant_comb_18plus.csv")
#recode grouping variable
data_ts <- data_ts %>% 
  mutate(group = case_when(
    group == "all" ~ "all",
    group == "dummy_on" ~ "migrant",
    group == "dummy_off_noreplacem" ~ "brits_noreplace",
    group == "dummy_off_withreplacem" ~ "brits_withreplace",
    TRUE ~ NA_character_
  ))

# upload NEW income control totals - from https://arunadvani.com/data/AdvaniSummersTarrant_UKTopIncomes_WIDTechnicalNote_Data.xlsx
tt <- read_excel('external/AdvaniSummersTarrant_UKTopIncomes_WIDTechnicalNote_Data.xlsx',
                 sheet = 'Income control', skip = 1)

tt <- tt %>% 
  mutate(tax_year = 1997:2019, 
         fiscal_income = `Pre-tax total excluding capital gains`*1000000
  ) %>% 
  select(tax_year, fiscal_income)

totals <- tt

#Impute missing year
totals[13, 2] <- (totals[[12, 2]] + totals[[14, 2]])/2


#Industry distribution data
data <- read_csv("output/industry_distribution.csv")

dfl <- data_ts %>% 
  filter(ts == "0.01" & (group == "all" | group == "brits_withreplace")) %>% 
  select(tax_year, ti, group, n)
totals <- totals %>% select(tax_year, fiscal_income)

dfl <- left_join(dfl, totals) %>% 
  pivot_wider(names_from = group, values_from = ti)

#Actual top shares in 1997 & 2018
dfl <- dfl %>% mutate(
  actual_top_sh = all * n / fiscal_income * 100,
  hypothetical_top_sh = brits_withreplace * n / fiscal_income * 100)


